Lets review MLB draft season.
Start by loading the packages
suppressMessages({
library(tidyverse) #ggplot2 dplyr tibble tidyr purrr forecats
library(ggrepel) #automatically position non-overlapping text labels
library(glue) #interpreted literal strings
library(gt)
library(gtExtras)
library(paletteer)
library(mlbplotR)
})
Next we load the team logos
teams_colors_logos <- mlbplotR::load_mlb_teams() %>%
filter(!team_abbr %in% c("AL", "NL", "MLB")) %>%
mutate(
a = rep(1:6, 5),
b = sort(rep(1:5, 6), decreasing=T),
alpha = ifelse(grepl("A", team_abbr),1,0.75),
color = ifelse(grepl("E", team_abbr), "b/w", NA)
)
Now lets load our draft data and clean it up.
exposure <- read.csv("./data/exposure_mar30.csv")
exposure <- exposure %>%
mutate(Picked.At = as.Date(as.POSIXct(exposure$Picked.At, format="%Y-%m-%d %H:%M:%S", tz="UTC")),
name = paste(First.Name, Last.Name)) %>%
select(name, Team, Position, Picked.At, Pick.Number, Draft) %>%
left_join(teams_colors_logos %>% select(team_abbr, team_logo_espn), by=c('Team'='team_abbr')) %>%
left_join(read.csv("./data/playerids.csv"), by=c('name'='Name')) %>%
mutate(
copy = paste0(name, Draft),
playerid = as.double(playerid)) %>%
distinct(copy, .keep_all = T) %>%
left_join(mlbplotR::load_headshots() %>% select(fangraphs_id, espn_headshot) %>% drop_na(fangraphs_id), by=c("playerid"="fangraphs_id")) %>%
select(-copy) %>%
left_join(read.csv("./projections_season/rankings_mar20.csv") %>%
mutate(name = paste(firstName, lastName),
adp = as.numeric(adp)) %>%
select(name, adp, projectedPoints, positionRank),
by=c("name")) %>%
mutate(value = Pick.Number-adp,
rel_value = round(value/adp, digits = 2),
positionGroup = gsub("[^A-Z]","",positionRank)) %>%
drop_na(adp) %>%
arrange(Pick.Number)
## Warning in mask$eval_all_mutate(quo): NAs introduced by coercion
Highest owned players
#group by player
exposure %>%
group_by(name, espn_headshot) %>%
summarise(count=n()) %>%
arrange(-count) %>%
ungroup() %>%
mutate(own = round(count/length(unique(exposure$Draft)),digits = 2)) %>%
slice_head(n=10) %>%
gt() %>%
gt_img_rows(columns = espn_headshot, height = 50) %>%
gt_theme_dark()
## `summarise()` has grouped output by 'name'. You can override using the
## `.groups` argument.
| name | espn_headshot | count | own |
|---|---|---|---|
| Mark Canha | 20 | 0.28 | |
| J.D. Martinez | 19 | 0.26 | |
| Willson Contreras | 18 | 0.25 | |
| Jake Cronenworth | 17 | 0.24 | |
| Lourdes Gurriel Jr. | 17 | 0.24 | |
| Tommy Edman | 15 | 0.21 | |
| Aaron Nola | 14 | 0.19 | |
| Austin Hays | 14 | 0.19 | |
| Justin Turner | 14 | 0.19 | |
| Alek Manoah | 13 | 0.18 |
Lets look at the drafts grouped by date
#drafts by date
drafts_by_date <- exposure %>%
group_by(Picked.At) %>%
summarize(total_picks = n(),
total_value = sum(value, na.rm = T),
total_rel_value = sum(rel_value, na.rm = T)) %>%
mutate(value_per_pick = round(total_value/total_picks,digits = 2),
rel_value_per_pick = round(total_rel_value/total_picks,digits=2))
drafts_by_date %>% gt() %>% gt_theme_dark()
| Picked.At | total_picks | total_value | total_rel_value | value_per_pick | rel_value_per_pick |
|---|---|---|---|---|---|
| 2023-02-19 | 20 | -21.0 | 0.25 | -1.05 | 0.01 |
| 2023-02-21 | 172 | -399.5 | -0.65 | -2.32 | 0.00 |
| 2023-02-22 | 111 | 232.2 | 7.02 | 2.09 | 0.06 |
| 2023-02-23 | 177 | -26.8 | 0.50 | -0.15 | 0.00 |
| 2023-02-25 | 120 | 118.6 | 3.09 | 0.99 | 0.03 |
| 2023-02-26 | 20 | -29.3 | 0.04 | -1.46 | 0.00 |
| 2023-02-27 | 39 | -99.8 | 0.69 | -2.56 | 0.02 |
| 2023-02-28 | 40 | 1.4 | 0.11 | 0.04 | 0.00 |
| 2023-03-01 | 20 | 35.1 | 0.78 | 1.76 | 0.04 |
| 2023-03-02 | 20 | -13.2 | 0.10 | -0.66 | 0.00 |
| 2023-03-04 | 40 | 29.2 | 0.56 | 0.73 | 0.01 |
| 2023-03-05 | 20 | 107.0 | 1.36 | 5.35 | 0.07 |
| 2023-03-06 | 20 | 32.6 | -0.33 | 1.63 | -0.02 |
| 2023-03-08 | 80 | -27.1 | 0.22 | -0.34 | 0.00 |
| 2023-03-09 | 20 | -42.3 | 0.92 | -2.11 | 0.05 |
| 2023-03-11 | 7 | 21.0 | 0.50 | 3.00 | 0.07 |
| 2023-03-12 | 57 | 67.8 | 0.56 | 1.19 | 0.01 |
| 2023-03-13 | 16 | -84.2 | -0.50 | -5.26 | -0.03 |
| 2023-03-19 | 80 | 239.2 | 2.20 | 2.99 | 0.03 |
| 2023-03-20 | 40 | -32.9 | -0.07 | -0.82 | 0.00 |
| 2023-03-26 | 160 | 25.3 | 3.13 | 0.16 | 0.02 |
| 2023-03-27 | 20 | -167.4 | -1.04 | -8.37 | -0.05 |
| 2023-03-29 | 80 | -218.2 | -0.47 | -2.73 | -0.01 |
| 2023-03-30 | 59 | -279.9 | -0.70 | -4.74 | -0.01 |
Top 10 picks from all drafts in terms of value
#top ten picks by value
exposure %>%
select(name, team_logo_espn, espn_headshot, Pick.Number, adp, value, rel_value, Picked.At) %>%
arrange(-rel_value) %>%
slice_head(n=10) %>%
gt() %>%
gt_img_rows(columns = "team_logo_espn", height = 50) %>%
gt_img_rows(columns = "espn_headshot", height = 50) %>%
gt_theme_dark()
| name | team_logo_espn | espn_headshot | Pick.Number | adp | value | rel_value | Picked.At |
|---|---|---|---|---|---|---|---|
| Aaron Judge | 4 | 1.2 | 2.8 | 2.33 | 2023-02-22 | ||
| Juan Soto | 6 | 2.5 | 3.5 | 1.40 | 2023-03-26 | ||
| Ronald Acuña Jr. | 6 | 2.9 | 3.1 | 1.07 | 2023-03-09 | ||
| Aaron Judge | 2 | 1.2 | 0.8 | 0.67 | 2023-03-30 | ||
| Aaron Judge | 2 | 1.2 | 0.8 | 0.67 | 2023-02-21 | ||
| Juan Soto | 4 | 2.5 | 1.5 | 0.60 | 2023-02-27 | ||
| Julio Rodríguez | 8 | 5.4 | 2.6 | 0.48 | 2023-02-22 | ||
| Jarred Kelenic | 225 | 153.5 | 71.5 | 0.47 | 2023-02-23 | ||
| Shohei Ohtani | 7 | 4.8 | 2.2 | 0.46 | 2023-02-22 | ||
| Jarred Kelenic | 221 | 153.5 | 67.5 | 0.44 | 2023-03-04 |
team drafted
#group by team drafted
exposure %>%
group_by(Team, team_logo_espn) %>%
summarise(count=n()) %>%
arrange(-count) %>%
ungroup() %>%
rename(team = team_logo_espn) %>%
slice_head(n=10) %>%
gt() %>%
gt_img_rows(columns = team) %>%
gt_theme_dark()
## `summarise()` has grouped output by 'Team'. You can override using the
## `.groups` argument.
| Team | team | count |
|---|---|---|
| STL | 116 | |
| NYM | 95 | |
| SD | 89 | |
| LAD | 85 | |
| ATL | 79 | |
| LAA | 73 | |
| TOR | 68 | |
| NYY | 56 | |
| SEA | 56 | |
| HOU | 55 |
by position
#group by position
exposure %>%
group_by(Position) %>%
summarise(count=n()) %>%
arrange(-count) %>%
ungroup() %>%
mutate(own = round(count/sum(count),digits = 2)) %>%
gt() %>%
gt_theme_dark()
| Position | count | own |
|---|---|---|
| SP | 430 | 0.30 |
| RF | 171 | 0.12 |
| LF | 165 | 0.11 |
| 3B | 162 | 0.11 |
| 1B | 114 | 0.08 |
| CF | 102 | 0.07 |
| SS | 102 | 0.07 |
| 2B | 94 | 0.07 |
| C | 59 | 0.04 |
| DH | 39 | 0.03 |
Number of stacked batters by team
#Number of stacked batters by team
exposure %>%
group_by(Draft, Team, team_logo_espn) %>%
summarise(count=n()) %>%
arrange(-count) %>%
ungroup() %>%
group_by(Team, team_logo_espn) %>%
summarise(count=n()) %>%
arrange(-count) %>%
gt() %>%
gt_img_rows(columns = team_logo_espn) %>%
gt_theme_dark()
## `summarise()` has grouped output by 'Draft', 'Team'. You can override using the
## `.groups` argument.
## `summarise()` has grouped output by 'Team'. You can override using the
## `.groups` argument.
| team_logo_espn | count |
|---|---|
| STL | |
| 47 | |
| NYM | |
| 44 | |
| ATL | |
| 41 | |
| LAD | |
| 41 | |
| SD | |
| 40 | |
| TOR | |
| 38 | |
| LAA | |
| 36 | |
| MIL | |
| 35 | |
| HOU | |
| 34 | |
| TB | |
| 34 | |
| SEA | |
| 33 | |
| SF | |
| 33 | |
| MIN | |
| 32 | |
| PHI | |
| 32 | |
| BOS | |
| 29 | |
| CLE | |
| 29 | |
| NYY | |
| 29 | |
| BAL | |
| 26 | |
| ARI | |
| 25 | |
| MIA | |
| 24 | |
| CWS | |
| 23 | |
| KC | |
| 23 | |
| TEX | |
| 23 | |
| COL | |
| 18 | |
| CHC | |
| 17 | |
| PIT | |
| 17 | |
| DET | |
| 12 | |
| CIN | |
| 10 | |
| WSH | |
| 2 | |
| OAK | |
| 1 | |
Creating objects to merge to the drafts dataframe
ord <- c("P","IF","OF")
#configurations of P, OF, IF
exposure_config <- exposure %>%
group_by(Draft, positionGroup) %>%
summarise(count=n()) %>%
arrange(Draft, factor(positionGroup, levels = ord)) %>%
ungroup() %>%
group_by(Draft) %>%
summarise(config = as.numeric(paste0(count, collapse = ""))) %>%
ungroup()
## `summarise()` has grouped output by 'Draft'. You can override using the
## `.groups` argument.
exposure_config %>%
group_by(config) %>%
summarise(count = n()) %>%
arrange(-count)
## # A tibble: 9 × 2
## config count
## <dbl> <int>
## 1 677 27
## 2 686 18
## 3 587 11
## 4 776 8
## 5 578 3
## 6 767 2
## 7 586 1
## 8 676 1
## 9 965 1
#stacked batters by draft
exposure_batters <- exposure %>%
filter(positionGroup != "P") %>%
group_by(Draft, Team) %>%
summarise(batters = n()) %>%
ungroup() %>%
filter(batters > 1) %>%
group_by(Draft) %>%
summarise(batters = sum(batters)) %>%
arrange(-batters)
## `summarise()` has grouped output by 'Draft'. You can override using the
## `.groups` argument.
# biggest stack per draft
exposure_big_stack <- exposure %>%
filter(positionGroup != "P") %>%
group_by(Draft, Team) %>%
summarise(batters = n()) %>%
ungroup() %>%
group_by(Draft) %>%
summarise(big_stack = max(batters)) %>%
ungroup()
## `summarise()` has grouped output by 'Draft'. You can override using the
## `.groups` argument.
#number of teams with stacked batters per draft
exposure_num_teams <- exposure %>%
filter(positionGroup != "P") %>%
group_by(Draft, Team) %>%
summarise(batters = n()) %>%
ungroup() %>%
filter(batters > 1) %>%
group_by(Draft) %>%
summarise(teams_stacked = n())
## `summarise()` has grouped output by 'Draft'. You can override using the
## `.groups` argument.
#find first pick of each draft
first_pick <- exposure %>%
filter(Pick.Number < 13) %>% select(name, Draft) %>%
rename("first_pick" = "name")
#drafts
drafts <- exposure %>%
#drop_na() %>%
group_by(Draft) %>%
summarize(total_picks = n(),
total_value = sum(value),
total_rel_value = sum(rel_value),
Picked.At = last(Picked.At)) %>%
mutate(value_per_pick = round(total_value/total_picks, digits = 2),
rel_value_per_pick = round(total_rel_value/total_picks, digits = 2)) %>%
arrange(-rel_value_per_pick) %>%
left_join(exposure_config, by=c("Draft")) %>%
left_join(exposure_batters, by=c("Draft")) %>%
left_join(exposure_big_stack, by=c("Draft")) %>%
left_join(exposure_num_teams, by=c("Draft")) %>%
left_join(first_pick, by=c("Draft")) %>%
mutate(file = paste(Picked.At,config, teams_stacked, batters, big_stack, first_pick))
drafts %>%
select(file, total_value, total_rel_value, Picked.At) %>%
gt()
| file | total_value | total_rel_value | Picked.At |
|---|---|---|---|
| 2023-02-22 587 5 11 3 Aaron Judge | 147.1 | 3.58000e+00 | 2023-02-22 |
| 2023-02-22 686 3 10 4 Mike Trout | 33.8 | 1.54000e+00 | 2023-02-22 |
| 2023-02-25 686 4 10 3 Shohei Ohtani | 120.5 | 1.62000e+00 | 2023-02-25 |
| 2023-03-05 686 2 8 5 Mookie Betts | 107.0 | 1.36000e+00 | 2023-03-05 |
| 2023-03-26 776 3 11 4 Juan Soto | -7.6 | 1.37000e+00 | 2023-03-26 |
| 2023-02-22 776 3 7 3 Julio Rodríguez | 46.1 | 1.18000e+00 | 2023-02-22 |
| 2023-02-23 677 4 11 3 Mookie Betts | 112.4 | 1.18000e+00 | 2023-02-23 |
| 2023-03-26 677 3 9 4 Mookie Betts | 98.8 | 1.10000e+00 | 2023-03-26 |
| 2023-03-08 686 6 13 3 Shohei Ohtani | 33.3 | 9.90000e-01 | 2023-03-08 |
| 2023-03-09 587 6 15 3 Ronald Acuña Jr. | -42.3 | 9.20000e-01 | 2023-03-09 |
| 2023-03-19 677 3 8 4 Juan Soto | 140.7 | 1.03000e+00 | 2023-03-19 |
| 2023-03-01 578 5 14 4 José Ramírez | 35.1 | 7.80000e-01 | 2023-03-01 |
| 2023-03-04 677 5 13 4 Julio Rodríguez | 70.6 | 7.90000e-01 | 2023-03-04 |
| 2023-03-29 677 5 12 4 Mike Trout | 27.1 | 9.00000e-01 | 2023-03-29 |
| 2023-02-22 587 4 10 3 Shohei Ohtani | 49.6 | 9.00000e-01 | 2023-02-22 |
| 2023-02-25 677 4 10 3 Mookie Betts | 57.0 | 8.40000e-01 | 2023-02-25 |
| 2023-03-26 686 4 10 3 Yordan Alvarez | -6.7 | 7.70000e-01 | 2023-03-26 |
| 2023-02-21 776 4 9 3 Aaron Judge | -50.4 | 5.90000e-01 | 2023-02-21 |
| 2023-02-23 677 4 11 5 Vladimir Guerrero Jr. | 67.6 | 6.40000e-01 | 2023-02-23 |
| 2023-02-25 677 4 9 3 Mike Trout | -3.2 | 6.40000e-01 | 2023-02-25 |
| 2023-03-12 686 6 13 3 Aaron Judge | 65.3 | 5.80000e-01 | 2023-03-12 |
| 2023-02-27 586 4 11 4 Juan Soto | -81.9 | 6.30000e-01 | 2023-02-27 |
| 2023-03-26 677 4 12 4 Julio Rodríguez | 44.1 | 3.50000e-01 | 2023-03-26 |
| 2023-02-23 686 3 12 5 Mookie Betts | 63.5 | 3.60000e-01 | 2023-02-23 |
| 2023-02-23 677 4 12 4 José Ramírez | 9.5 | 4.70000e-01 | 2023-02-23 |
| 2023-03-19 677 4 11 4 Trea Turner | 45.3 | 4.50000e-01 | 2023-03-19 |
| 2023-03-19 677 4 9 3 Shohei Ohtani | 69.6 | 4.80000e-01 | 2023-03-19 |
| 2023-03-29 677 3 11 5 Mike Trout | 5.8 | 3.20000e-01 | 2023-03-29 |
| 2023-03-12 686 3 12 6 Shohei Ohtani | 21.1 | 4.70000e-01 | 2023-03-12 |
| 2023-03-12 686 4 14 4 Shohei Ohtani | 10.7 | 3.00000e-01 | 2023-03-12 |
| 2023-03-19 686 3 10 4 Julio Rodríguez | -16.4 | 2.40000e-01 | 2023-03-19 |
| 2023-02-21 776 3 6 2 Trea Turner | -87.0 | 2.10000e-01 | 2023-02-21 |
| 2023-02-19 965 2 4 2 Kyle Tucker | -21.0 | 2.50000e-01 | 2023-02-19 |
| 2023-02-21 677 2 4 2 Mike Trout | -26.7 | 2.80000e-01 | 2023-02-21 |
| 2023-02-25 677 4 11 4 Aaron Judge | 7.0 | 2.00000e-01 | 2023-02-25 |
| 2023-02-23 677 5 13 3 José Ramírez | -4.7 | 2.30000e-01 | 2023-02-23 |
| 2023-02-28 677 2 9 7 José Ramírez | -10.5 | 2.10000e-01 | 2023-02-28 |
| 2023-02-21 776 1 6 6 Mike Trout | 0.9 | -7.00000e-02 | 2023-02-21 |
| 2023-03-29 776 4 10 4 Juan Soto | -58.7 | 5.20417e-18 | 2023-03-29 |
| 2023-02-26 677 4 11 3 José Ramírez | -29.3 | 4.00000e-02 | 2023-02-26 |
| 2023-03-26 686 3 8 4 Shohei Ohtani | 0.2 | 3.00000e-02 | 2023-03-26 |
| 2023-02-22 767 4 9 3 Vladimir Guerrero Jr. | -61.9 | -8.00000e-02 | 2023-02-22 |
| 2023-02-21 776 1 3 3 Kyle Tucker | 21.3 | 8.00000e-02 | 2023-02-21 |
| 2023-03-02 677 5 13 5 Yordan Alvarez | -13.2 | 1.00000e-01 | 2023-03-02 |
| 2023-03-20 587 3 12 6 Juan Soto | -20.5 | 9.00000e-02 | 2023-03-20 |
| 2023-02-23 587 3 11 4 Mike Trout | -25.1 | -1.00000e-02 | 2023-02-23 |
| 2023-03-08 587 4 11 5 Mookie Betts | -0.6 | -9.00000e-02 | 2023-03-08 |
| 2023-03-30 676 4 11 4 Aaron Judge | -92.8 | 3.00000e-02 | 2023-03-30 |
| 2023-03-26 677 4 11 4 Vladimir Guerrero Jr. | -31.2 | 8.00000e-02 | 2023-03-26 |
| 2023-02-27 587 4 12 5 Juan Soto | -17.9 | 6.00000e-02 | 2023-02-27 |
| 2023-02-28 677 3 9 5 José Ramírez | 11.9 | -1.00000e-01 | 2023-02-28 |
| 2023-02-25 587 4 12 4 Shohei Ohtani | -27.3 | 6.00000e-02 | 2023-02-25 |
| 2023-02-25 677 4 12 4 Shohei Ohtani | -35.4 | -2.70000e-01 | 2023-02-25 |
| 2023-02-21 767 3 6 2 Kyle Tucker | -51.7 | -1.90000e-01 | 2023-02-21 |
| 2023-03-04 587 5 14 5 Aaron Judge | -41.4 | -2.30000e-01 | 2023-03-04 |
| 2023-03-30 686 5 13 4 Trea Turner | -80.6 | -1.10000e-01 | 2023-03-30 |
| 2023-03-20 677 3 12 5 José Ramírez | -12.4 | -1.60000e-01 | 2023-03-20 |
| 2023-03-26 587 6 14 3 Mike Trout | -17.6 | -1.40000e-01 | 2023-03-26 |
| 2023-02-21 776 3 7 3 Shohei Ohtani | -13.2 | -3.60000e-01 | 2023-02-21 |
| 2023-03-06 686 4 11 3 Ronald Acuña Jr. | 32.6 | -3.30000e-01 | 2023-03-06 |
| 2023-02-22 677 3 10 5 José Ramírez | -76.0 | -4.10000e-01 | 2023-02-22 |
| 2023-03-08 686 4 12 4 Ronald Acuña Jr. | -32.5 | -3.80000e-01 | 2023-03-08 |
| 2023-03-08 686 4 11 4 Juan Soto | -27.3 | -3.00000e-01 | 2023-03-08 |
| 2023-03-26 686 3 8 4 Mookie Betts | -54.7 | -4.30000e-01 | 2023-03-26 |
| 2023-03-30 686 3 11 7 Yordan Alvarez | -106.5 | -6.20000e-01 | 2023-03-30 |
| 2023-02-23 587 4 11 4 Mike Trout | -82.5 | -5.10000e-01 | 2023-02-23 |
| 2023-02-23 578 3 8 4 Juan Soto | -77.8 | -7.00000e-01 | 2023-02-23 |
| 2023-03-13 677 5 14 3 Mike Trout | -92.5 | -7.90000e-01 | 2023-03-13 |
| 2023-03-27 677 4 8 2 Vladimir Guerrero Jr. | -167.4 | -1.04000e+00 | 2023-03-27 |
| 2023-02-22 686 3 6 2 Gerrit Cole | -101.8 | -9.10000e-01 | 2023-02-22 |
| 2023-02-23 578 3 12 5 Yordan Alvarez | -87.1 | -1.13000e+00 | 2023-02-23 |
| 2023-03-29 677 4 12 4 Paul Goldschmidt | -192.4 | -1.69000e+00 | 2023-03-29 |
#create a list from the dataframe
exp_list <- split(exposure, exposure$Draft)
name_mapping <- data.frame(
old_names = drafts$Draft,
new_names = drafts$file,
stringsAsFactors = F
)
# Find the indices of the old names in the dataframe
name_indices <- match(names(exp_list), name_mapping$old_names)
# Rename the list elements using the new names from the dataframe
names(exp_list)[!is.na(name_indices)] <- name_mapping$new_names[name_indices[!is.na(name_indices)]]